package com.example.springbootmvc.contoller;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.ResponseBody;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.util.Random;

@Controller
public class SpringController {

    @GetMapping("/test/hello")
    @ResponseBody
    public String hello() throws Exception {
        Writer writer = new FileWriter(new File("d:/battle-through-the-heavens.txt"));
        Random random = new Random();
            for (int i = 1; i < 1649; i++) {
                Document doc = null;
                String url ="https://www.wuxiaworld.com/novel/battle-through-the-heavens/btth-chapter-"+i;
                if (i == 390) {
                    url = url+"-1";
                }
                try {

                   doc = Jsoup.connect(url).get();
                } catch (Exception e) {
                    e.printStackTrace();
                   i--;
                   continue;
                }

                Elements root = doc.select("#content-container > div.section > div.section-content div.panel.panel-default div.p-15 div.fr-view");
                root.select("a").remove();
                String content =root.html()
                        .replaceAll("<p>","")
                        .replaceAll("</p>","")
                        .replaceAll("<strong>","")
                        .replaceAll("</strong>","")
                        .replaceAll("<span style=\"\">","")
                        .replaceAll("<span>","")
                        .replaceAll("</span>","")
                        .replaceAll("<hr>","")
                        .replaceAll("<br>","");
                System.out.println(content);
                writer.write(content+"\n\r");
                writer.flush();
                System.out.println(url+",处理完成");
                Thread.sleep(random.nextInt(1000));
            }

            writer.close();
        return "hello ,world!";
    }
}
